Chapter 8 HMSC analysis
8.2 Variance partitioning
# Compute variance partitioning
varpart=computeVariancePartitioning(m)
varpart$vals %>%
as.data.frame() %>%
rownames_to_column(var="variable") %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location")))) %>%
group_by(variable) %>%
summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
tt()| variable | mean | sd |
|---|---|---|
| Random: location | 37.900015 | 25.317903 |
| logseqdepth | 56.110626 | 25.796874 |
| sex | 4.937460 | 5.612719 |
| origin | 1.051899 | 1.282563 |
# Basal tree
varpart_tree <- genome_tree
#Varpart table
varpart_table <- varpart$vals %>%
as.data.frame() %>%
rownames_to_column(var="variable") %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label))) %>%
mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location"))))
#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__"))%>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% varpart_tree$tip.label) %>%
arrange(match(genome, varpart_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
select(phylum)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__"))%>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% varpart_tree$tip.label) %>%
arrange(match(genome, varpart_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Basal ggtree
varpart_tree <- varpart_tree %>%
force.ultrametric(.,method="extend") %>%
ggtree(., size = 0.3)***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic)+
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()
# Add variance stacked barplot
vertical_tree <- varpart_tree +
scale_fill_manual(values=c("#506a96","#cccccc","#be3e2b","#f6de6c"))+
geom_fruit(
data=varpart_table,
geom=geom_bar,
mapping = aes(x=value, y=genome, fill=variable, group=variable),
pwidth = 2,
offset = 0.05,
width= 1,
orientation="y",
stat="identity")+
labs(fill="Variable")
vertical_tree8.3 Posterior estimates
# Select desired support threshold
support=0.9
negsupport=1-support
# Basal tree
postestimates_tree <- genome_tree
# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
mutate(value = case_when(
value >= support ~ "Positive",
value <= negsupport ~ "Negative",
TRUE ~ "Neutral")) %>%
mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
pivot_wider(names_from = variable, values_from = value) %>%
#select(genome,sp_vulgaris,area_semi,area_urban,sp_vulgarisxarea_semi,sp_vulgarisxarea_urban,season_spring,season_winter,sp_vulgarisxseason_spring,sp_vulgarisxseason_winter) %>%
column_to_rownames(var="genome")
#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__")) %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% postestimates_tree$tip.label) %>%
arrange(match(genome, postestimates_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
select(phylum)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__")) %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% postestimates_tree$tip.label) %>%
arrange(match(genome, postestimates_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Basal ggtree
postestimates_tree <- postestimates_tree %>%
force.ultrametric(.,method="extend") %>%
ggtree(., size = 0.3)***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic)+
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()
# Add posterior significant heatmap
postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
labs(fill="Trend")
postestimates_tree +
vexpand(.25, 1) # expand top 8.4 Correlations
#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)
# Refernece tree (for sorting genomes)
genome_tree_subset <- genome_tree %>%
keep.tip(., tip=m$spNames)
#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
+ (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean
matrix <- toPlot %>%
as.data.frame() %>%
rownames_to_column(var="genome1") %>%
pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
mutate(genome1= factor(genome1, levels=genome_tree_subset$tip.label)) %>%
mutate(genome2= factor(genome2, levels=genome_tree_subset$tip.label)) %>%
ggplot(aes(x = genome1, y = genome2, fill = cor)) +
geom_tile() +
scale_fill_gradient2(low = "#be3e2b",
mid = "#f4f4f4",
high = "#b2b530")+
theme_void()
htree <- genome_tree_subset %>%
force.ultrametric(.,method="extend") %>%
ggtree(.)***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#create composite figure
grid.arrange(grobs = list(matrix,vtree),
layout_matrix = rbind(c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1),
c(2,1,1,1,1,1,1,1,1,1,1,1)))8.5 Predict responses
# Select modelchain of interest
load("hmsc/fit_model1_250_10.Rdata")
gradient = c("domestic","feral")
gradientlength = length(gradient)
#Treatment-specific gradient predictions
pred <- constructGradient(m,
focalVariable = "origin",
non.focalVariables = list(logseqdepth=list(1),location=list(1))) %>%
predict(m, Gradient = ., expected = TRUE) %>%
do.call(rbind,.) %>%
as.data.frame() %>%
mutate(origin=rep(gradient,1000)) %>%
pivot_longer(!origin,names_to = "genome", values_to = "value")# weights: 9 (4 variable)
initial value 101.072331
final value 91.392443
converged
8.5.0.1 Element level
elements_table <- genome_gifts %>%
to.elements(., GIFT_db) %>%
as.data.frame()
community_elements <- pred %>%
group_by(origin, genome) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = genome, values_from = value) %>%
ungroup() %>%
group_split(row_id) %>%
as.list() %>%
lapply(., FUN = function(x){x %>%
select(-row_id) %>%
column_to_rownames(var = "origin") %>%
as.data.frame() %>%
exp() %>%
t() %>%
tss() %>%
to.community(elements_table,.,GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="origin")
})
calculate_slope <- function(x) {
lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
coef(lm_fit)[2]
}
element_predictions <- map_dfc(community_elements, function(mat) {
mat %>%
column_to_rownames(var = "origin") %>%
t() %>%
as.data.frame() %>%
rowwise() %>%
mutate(slope = calculate_slope(c_across(everything()))) %>%
select(slope) }) %>%
t() %>%
as.data.frame() %>%
set_names(colnames(community_elements[[1]])[-1]) %>%
rownames_to_column(var="iteration") %>%
pivot_longer(!iteration, names_to="trait",values_to="value") %>%
group_by(trait) %>%
summarise(mean=mean(value),
p1 = quantile(value, probs = 0.1),
p9 = quantile(value, probs = 0.9),
positive_support = sum(value > 0)/1000,
negative_support = sum(value < 0)/1000) %>%
arrange(-positive_support)# Positively associated
element_predictions %>%
filter(mean >0) %>%
arrange(-positive_support) %>%
filter(positive_support>=0.9) %>%
tt()| trait | mean | p1 | p9 | positive_support | negative_support |
|---|---|---|---|---|---|
| D0205 | 0.012603387 | 2.593807e-03 | 0.022939556 | 0.954 | 0.046 |
| D0208 | 0.010227729 | 1.642595e-03 | 0.018507180 | 0.945 | 0.055 |
| D0906 | 0.003565801 | 1.749126e-04 | 0.007907557 | 0.928 | 0.072 |
| D0504 | 0.004396738 | 2.650007e-04 | 0.009408255 | 0.921 | 0.079 |
| B0103 | 0.009096237 | 8.776903e-04 | 0.017087166 | 0.916 | 0.084 |
| D0507 | 0.003697711 | 6.655942e-05 | 0.007228665 | 0.903 | 0.097 |
element_predictions %>%
filter(mean <0) %>%
arrange(-negative_support) %>%
filter(negative_support>=0.9) %>%
tt()| trait | mean | p1 | p9 | positive_support | negative_support |
|---|---|---|---|---|---|
| D0801 | -0.001672597 | -0.002483633 | -9.833235e-05 | 0.014 | 0.986 |
| D0802 | -0.001672597 | -0.002483633 | -9.833235e-05 | 0.014 | 0.986 |
| B0310 | -0.013442775 | -0.024190545 | -3.605897e-03 | 0.025 | 0.975 |
| D0517 | -0.005167475 | -0.008180822 | -1.341754e-03 | 0.025 | 0.975 |
| B0302 | -0.005193974 | -0.010525780 | -6.418008e-04 | 0.032 | 0.968 |
| B0709 | -0.002110743 | -0.003632081 | -5.607182e-04 | 0.034 | 0.966 |
| D0601 | -0.009391013 | -0.017719312 | -1.923462e-03 | 0.034 | 0.966 |
| D0611 | -0.004341066 | -0.009391699 | -2.372652e-04 | 0.038 | 0.962 |
| D0903 | -0.004341066 | -0.009391699 | -2.372652e-04 | 0.038 | 0.962 |
| B0219 | -0.004355929 | -0.009480552 | -2.632315e-04 | 0.039 | 0.961 |
| B0804 | -0.017049337 | -0.030998939 | -3.672770e-03 | 0.044 | 0.956 |
| D0807 | -0.004530684 | -0.008778695 | -6.943753e-04 | 0.044 | 0.956 |
| D0603 | -0.002011208 | -0.004002246 | -2.931153e-04 | 0.047 | 0.953 |
| D0817 | -0.005309855 | -0.010758020 | -5.734667e-04 | 0.051 | 0.949 |
| B0303 | -0.012060364 | -0.022094010 | -2.735578e-03 | 0.052 | 0.948 |
| D0610 | -0.003193577 | -0.005075296 | -8.978410e-04 | 0.054 | 0.946 |
| D0606 | -0.006029744 | -0.011505444 | -8.948160e-04 | 0.061 | 0.939 |
| B0603 | -0.016268750 | -0.032036510 | -2.680562e-03 | 0.063 | 0.937 |
| D0908 | -0.016042713 | -0.027618565 | -4.081974e-03 | 0.063 | 0.937 |
| B0214 | -0.021066765 | -0.037854042 | -4.079732e-03 | 0.076 | 0.924 |
| B0401 | -0.011800076 | -0.023901689 | -9.968034e-04 | 0.078 | 0.922 |
| D0816 | -0.006097811 | -0.012262606 | -5.143077e-04 | 0.079 | 0.921 |
| B0601 | -0.009436323 | -0.018855245 | -6.283849e-04 | 0.083 | 0.917 |
| D0508 | -0.003593430 | -0.007732534 | -2.235608e-04 | 0.084 | 0.916 |
| D0612 | -0.002002036 | -0.003174204 | -7.712861e-05 | 0.088 | 0.912 |
| B0204 | -0.016246634 | -0.032996834 | -1.386663e-03 | 0.090 | 0.910 |
| B0708 | -0.025619028 | -0.048674986 | -2.910586e-04 | 0.097 | 0.903 |
| B0309 | -0.007653490 | -0.015286326 | -2.801407e-05 | 0.099 | 0.901 |
positive <- element_predictions %>%
filter(mean >0) %>%
arrange(mean) %>%
filter(positive_support>=0.9) %>%
select(-negative_support) %>%
rename(support=positive_support)
negative <- element_predictions %>%
filter(mean <0) %>%
arrange(mean) %>%
filter(negative_support>=0.9) %>%
select(-positive_support) %>%
rename(support=negative_support)
bind_rows(positive,negative) %>%
left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
ggplot(aes(x=mean, y=fct_rev(trait), xmin=p1, xmax=p9, color=Function)) +
geom_point() +
geom_errorbar() +
xlim(c(-0.04,0.04)) +
geom_vline(xintercept=0) +
scale_color_manual(values = c("#debc14","#440526","#dc7c17","#172742","#debc14","#440526","#dc7c17","#172742","#357379","#6c7e2c","#d8dc69","#774d35","#db717d")) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait")8.5.0.2 Function level
functions_table <- elements_table %>%
to.functions(., GIFT_db) %>%
as.data.frame()
community_functions <- pred %>%
group_by(origin, genome) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = genome, values_from = value) %>%
ungroup() %>%
group_split(row_id) %>%
as.list() %>%
lapply(., FUN = function(x){x %>%
select(-row_id) %>%
column_to_rownames(var = "origin") %>%
as.data.frame() %>%
exp() %>%
t() %>%
tss() %>%
to.community(functions_table,.,GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="origin")
})#max-min option
calculate_slope <- function(x) {
lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
coef(lm_fit)[2]
}
function_predictions <- map_dfc(community_functions, function(mat) {
mat %>%
column_to_rownames(var = "origin") %>%
t() %>%
as.data.frame() %>%
rowwise() %>%
mutate(slope = calculate_slope(c_across(everything()))) %>%
select(slope) }) %>%
t() %>%
as.data.frame() %>%
set_names(colnames(community_functions[[1]])[-1]) %>%
rownames_to_column(var="iteration") %>%
pivot_longer(!iteration, names_to="trait",values_to="value") %>%
group_by(trait) %>%
summarise(mean=mean(value),
p1 = quantile(value, probs = 0.1),
p9 = quantile(value, probs = 0.9),
positive_support = sum(value > 0)/1000,
negative_support = sum(value < 0)/1000) %>%
arrange(-positive_support)
# Positively associated
function_predictions %>%
filter(mean >0) %>%
arrange(-positive_support) %>%
tt()| trait | mean | p1 | p9 | positive_support | negative_support |
|---|---|---|---|---|---|
| D02 | 0.0084245611 | -0.002708007 | 0.020025834 | 0.836 | 0.164 |
| B08 | 0.0074961511 | -0.003350089 | 0.018284003 | 0.791 | 0.209 |
| B01 | 0.0011568863 | -0.005731988 | 0.008379737 | 0.625 | 0.375 |
| S01 | 0.0007429401 | -0.011690216 | 0.013752273 | 0.570 | 0.430 |
# Negatively associated
function_predictions %>%
filter(mean <0) %>%
arrange(-negative_support) %>%
tt()| trait | mean | p1 | p9 | positive_support | negative_support |
|---|---|---|---|---|---|
| B03 | -1.112411e-02 | -0.0187246652 | -0.0039909543 | 0.032 | 0.968 |
| D08 | -1.214619e-03 | -0.0023722116 | -0.0002337339 | 0.034 | 0.966 |
| D06 | -3.261848e-03 | -0.0071802725 | 0.0001432296 | 0.108 | 0.892 |
| B04 | -8.359836e-03 | -0.0185309424 | 0.0011794391 | 0.135 | 0.865 |
| D07 | -1.206714e-02 | -0.0304543194 | 0.0040195356 | 0.182 | 0.818 |
| B06 | -6.715638e-03 | -0.0162987317 | 0.0033156299 | 0.188 | 0.812 |
| D05 | -2.065349e-03 | -0.0078384328 | 0.0033380053 | 0.209 | 0.791 |
| D03 | -4.331512e-03 | -0.0134588083 | 0.0035668705 | 0.224 | 0.776 |
| S03 | -1.020199e-02 | -0.0337194527 | 0.0150239871 | 0.247 | 0.753 |
| B02 | -4.000958e-03 | -0.0128546917 | 0.0041689091 | 0.263 | 0.737 |
| D09 | -2.283353e-03 | -0.0088140914 | 0.0048311247 | 0.312 | 0.688 |
| S02 | -4.751123e-03 | -0.0151394921 | 0.0031706467 | 0.325 | 0.675 |
| B07 | -4.054452e-03 | -0.0158206337 | 0.0080154534 | 0.331 | 0.669 |
| B09 | -2.002830e-05 | -0.0005311183 | 0.0004142376 | 0.361 | 0.639 |
| D01 | -4.373472e-04 | -0.0049969959 | 0.0041040520 | 0.410 | 0.590 |
| B10 | -1.442446e-05 | -0.0003132959 | 0.0002538415 | 0.473 | 0.527 |
positive <- function_predictions %>%
filter(mean >0) %>%
arrange(mean) %>%
filter(positive_support>=0.9) %>%
select(-negative_support) %>%
rename(support=positive_support)
negative <- function_predictions %>%
filter(mean <0) %>%
arrange(mean) %>%
filter(negative_support>=0.9) %>%
select(-positive_support) %>%
rename(support=negative_support)
bind_rows(positive,negative) %>%
left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
ggplot(aes(x=mean, y=fct_rev(trait), xmin=p1, xmax=p9, color=Function)) +
geom_point() +
geom_errorbar() +
xlim(c(-0.02,0.02)) +
geom_vline(xintercept=0) +
scale_color_manual(values = c("#debc14","#440526","#dc7c17","#172742","#debc14","#440526","#dc7c17","#172742","#357379","#6c7e2c","#d8dc69","#774d35","#db717d")) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait")